const fs = require("fs");
const mammoth = require("mammoth");
const path = require("path");

// 输入文件路径
const inputPath = path.resolve(__dirname, "./RMET-0124.docx");
const outputPath = path.resolve(__dirname, "output.html");

const convertOptions = {
  convertImage: mammoth.images.inline(),
  styleMap: [
    "p[style-name='Normal'] => p:fresh",
    "br[page] => div.page-break:fresh", // 关键：将分页符映射为 <div class="page-break">
  ],
};

fs.readFile(inputPath, (err, data) => {
  if (err) throw err;

  mammoth
    .convertToHtml({ buffer: data }, convertOptions)
    .then((result) => {
      // 加上分页CSS
      const htmlWithStyle = `
                <html>
                    <head>
                        <style>
                            .page-break {
                                page-break-before: always;
                                break-before: page;
                                height: 40px;
                                border-top: 1px dashed #ccc;
                                margin: 40px 0;
                            }
                        </style>
                    </head>
                    <body>${result.value}</body>
                </html>
            `;
      fs.writeFileSync(outputPath, htmlWithStyle);
      console.log(`✅ 转换完成，结果保存在 ${outputPath}`);
    })
    .catch((err) => console.error("转换失败：", err));
});
